/******************************************************************************
* building zip level covariates -- demographics data from ACS 
* ACS summary file as of 2015, which are avialble for download from American FactFinder (AFF)

* we use 2015 ACS 5-year table, at 5-digit zcta level

* Each ACS summary table is in a separate csv files in the package downloaded from AFF
* so we need to process them separately
*https://factfinder.census.gov/faces/affhelp/jsf/pages/metadata.xhtml?lang=en&type=dataset&id=dataset.en.ACS_15_5YR
*****************************************************************************/


clear all
set more off
global proj "/Users/hanyang/tu2"
global dataRoot "${proj}/Data/"



**
global saveNum 0

*clean AFF data
cap program drop cleanAFF
program define cleanAFF
	*relabel zip
	rename geoid2 zcta5
	label var zcta "zcta 5"
	
	*drop unncessary variable
	drop h*_*v*
	drop geodisplaylabel geoid
	
	compress
	global saveNum = ${saveNum}+1

end



*collapse and convert variables to percentages
cap program drop collapseGeo
program define collapseGeo
	args geo 
	
	
	*collapse at zip level
	replace inc_median = inc_median* total_hh
	replace inc_mean   = inc_mean  * total_hh
	replace inc_gini   = inc_gini  * total_hh
	replace house_median = house_median * total_own_house
	
	collapse (sum) total_population - house_median, by(`geo')
			
	
	replace inc_median = inc_median/ total_hh
	replace inc_mean   = inc_mean  / total_hh
	replace inc_gini   = inc_gini  / total_hh
	replace house_median = house_median / total_own_house
	
	*convert frequency to fractions
	foreach var of varlist job_labor_force - job_not_labor_force{
		replace `var' = 100*`var'/total_pop16
	}
	
	foreach var of varlist edu_bachelor - edu_advanced{
		replace `var' = 100*`var'/total_pop25
	}
	
	
	foreach var of varlist covered_any{
		replace `var' = 100*`var'/total_pop
	}
	
	foreach var of varlist own_vehicle{
		replace `var' = 100*`var'/total_worker16
	}
	
	foreach var of varlist house_own house_rent{
		replace `var' = 100*`var'/total_house
	}
	
	
	*label vars
	labelVar
		
	*summarize all variables
	sum *
end





*collapse and convert variables to percentages
cap program drop labelVar
program define labelVar
	
	cap label var total_population 		"total population"
	cap label var total_pop20to80 		"total population 20 - 80"
	
	
	cap label var total_pop16 		    "total population >=16"
	cap label var job_labor_force 		"civilian labor force >=16"
	cap label var job_unemployed  		"unemployed in civilian labor force >=16"
	cap label var job_not_labor_force  	"not in labor force >=16"
	
	cap label var total_pop25   	"total population >=25"
	cap label var edu_no_highschool "no highschool degree >=25"
	cap label var edu_highschool    "highschool degree or equivalent >=25"
	cap label var edu_somecollege   "some college >=25"
	cap label var edu_bachelor      "bachelor degree >=25"
	cap label var edu_advanced      "advanced degree >=25"
	
	cap label var total_hh      "Total n. households"
	cap label var inc_median "median households income (k)"
	cap label var inc_mean   "mean households income (k)"
	
	cap label var inc_gini		"households income gini"
	
	cap label var total_pop 		"total population"
	cap label var covered_any   	"population covered by insurance"
		
	cap label var total_worker16 	"total workers 16 or older"
	cap label var own_vehicle    	"own vehicles"
	
	
	cap label var total_house   "total housing unit"
	cap label var house_own     "owner-occupied houses"
	cap label var house_rent    "renter-occupied houses"
	
	cap label var house_median   "house median value (k)"
end


*******************************************************
* Age, -- ACS 2015
*******************************************************

*load downloaded data from american factfinder
import delimited using "$dataRoot/Raw/ACS_15_5YR_S0101_with_ann", ///
	delimiters(",") rowrange(3) varnames(1) numericcols(4/999) clear
	
*total population and population 20 to 80
rename hc01_est_vc01 total_population
gen total_pop20to80 = ///
	hc01_est_vc07 + hc01_est_vc08 + hc01_est_vc09 + hc01_est_vc10 + ///
	hc01_est_vc11 + hc01_est_vc12 + hc01_est_vc13 + hc01_est_vc14 + ///
	hc01_est_vc15 + hc01_est_vc16 + hc01_est_vc17 + hc01_est_vc18
replace total_pop20to80 = total_pop20to80*total_population/100


labelVar

*save
cleanAFF
save  "$dataRoot/Raw/acs_2015_${saveNum}", replace



*******************************************************
* Employment, -- ACS 2015
*******************************************************
*load downloaded data from american factfinder
import delimited using "$dataRoot/Raw/ACS_15_5YR_B23025_with_ann", ///
	delimiters(",") rowrange(3) varnames(1) numericcols(4/999) clear

*labor force and employment
rename hd01_vd01 total_pop16
rename hd01_vd03 job_labor_force
rename hd01_vd05 job_unemployed
rename hd01_vd07 job_not_labor_force

labelVar

*save
cleanAFF
save  "$dataRoot/Raw/acs_2015_${saveNum}", replace


*******************************************************
* Education, -- ACS 2015
*******************************************************
*load downloaded data from american factfinder
import delimited using "$dataRoot/Raw/ACS_15_5YR_B15003_with_ann", ///
	delimiters(",") rowrange(3) varnames(1) numericcols(4/999) clear

*drop margin of error variables
drop hd02*

*population and education attainment
rename hd01_vd01 total_pop25
egen edu_no_highschool = rowtotal(hd01_vd02 - hd01_vd16)
gen  edu_highschool    = hd01_vd17 + hd01_vd18
egen edu_somecollege   = rowtotal(hd01_vd19 - hd01_vd21)
rename hd01_vd22 edu_bachelor
egen edu_advanced      = rowtotal(hd01_vd23 - hd01_vd25)

labelVar

*save
cleanAFF
save  "$dataRoot/Raw/acs_2015_${saveNum}", replace



*******************************************************
* Income, -- ACS 2015
*******************************************************
*load downloaded data from american factfinder
import delimited using "$dataRoot/Raw/ACS_15_5YR_S1901_with_ann", ///
	delimiters(",") rowrange(3) varnames(1) numericcols(4/999) clear

*drop margin of error variables
drop *_moe_*

*median/mean income
rename hc01_est_vc01 total_hh
gen  inc_median = hc01_est_vc13/1000
gen  inc_mean   = hc01_est_vc15/1000
	
labelVar

*save
cleanAFF
save  "$dataRoot/Raw/acs_2015_${saveNum}", replace



*******************************************************
* Income Inequality, -- ACS 2015
*******************************************************
*load downloaded data from american factfinder
import delimited using "$dataRoot/Raw/ACS_15_5YR_B19083_with_ann", ///
	delimiters(",") rowrange(3) varnames(1) numericcols(4/999) clear

*households income
rename hd01_vd01 inc_gini

labelVar

*save
cleanAFF
save  "$dataRoot/Raw/acs_2015_${saveNum}", replace




*******************************************************
* Healthcare, -- ACS 2015
*******************************************************
*load downloaded data from american factfinder
import delimited using "$dataRoot/Raw/ACS_15_5YR_B27010_with_ann", ///
	delimiters(",") rowrange(3) varnames(1) numericcols(4/999) clear


*drop margin of error variables
drop hd02*

*population and insurance coverage
rename hd01_vd01 total_pop

*healhcare coverage 
egen not_covered      = rowtotal(hd01_vd17 hd01_vd33 hd01_vd50 hd01_vd66)
gen covered_any = total_pop - not_covered
drop not_covered

labelVar

*save
cleanAFF
save  "$dataRoot/Raw/acs_2015_${saveNum}", replace



*******************************************************
* Vehicle, -- ACS 2015
*******************************************************
*load downloaded data from american factfinder
import delimited using "$dataRoot/Raw/ACS_15_5YR_B08141_with_ann", ///
	delimiters(",") rowrange(3) varnames(1) numericcols(4/999) clear


*drop margin of error variables
drop hd02*

*workers 16 or older in households
gen total_worker16 = hd01_vd01

*own vehicles
gen own_vehicle = hd01_vd01 - hd01_vd03


labelVar

*save
cleanAFF
save  "$dataRoot/Raw/acs_2015_${saveNum}", replace




*******************************************************
* Housing, -- ACS 2015
*******************************************************
*load downloaded data from american factfinder
import delimited using "$dataRoot/Raw/ACS_15_5YR_S2504_with_ann", ///
	delimiters(",") rowrange(3) varnames(1) numericcols(4/999) clear


*drop margin of error variables
drop *moe*

gen total_house = hc01_est_vc01 

*ownership
gen house_own  = hc02_est_vc01
gen house_rent = hc03_est_vc01


labelVar

*save
cleanAFF
save  "$dataRoot/Raw/acs_2015_${saveNum}", replace



*******************************************************
* Housing Value Detailed, -- ACS 2015
*******************************************************
*load downloaded data from american factfinder
import delimited using "$dataRoot/Raw/ACS_15_5YR_B25075_with_ann", ///
	delimiters(",") rowrange(3) varnames(1) numericcols(4/999) clear

drop hd02_*
rename hd01_vd01 total_own_house


labelVar

*save
cleanAFF
save  "$dataRoot/Raw/acs_2015_${saveNum}", replace

*******************************************************
* Housing Value, -- ACS 2015
*******************************************************
*load downloaded data from american factfinder
import delimited using "$dataRoot/Raw/ACS_15_5YR_B25077_with_ann", ///
	delimiters(",") rowrange(3) varnames(1) numericcols(4/999) clear

gen house_median = hd01_vd01
replace house_median = house_median/1000


labelVar

*save
cleanAFF
save  "$dataRoot/Raw/acs_2015_${saveNum}", replace



*******************************************************
* Aggregate, -- ACS 2015
*******************************************************

use  "$dataRoot/Raw/acs_2015_1", clear
forval f=1/$saveNum{
	merge 1:1 zcta5 using "$dataRoot/Raw/acs_2015_`f'", nogenerate
}

compress
rename zcta zip
destring zip, replace



*** collapse at zip level
preserve
	collapseGeo zip
	save "$dataRoot/clean/acs_2015_zip", replace
restore


*** collapse at cz level
*zip to county to cz
merge m:1 zip using "$dataRoot/Crosswalk/cw_zip_to_county", keepusing(zip_str countyfips*)
keep if _merge==3
drop _merge

rename countyfips county_id
merge m:1 county_id using "$dataRoot/Crosswalk/cw_cty00_cz.dta"
keep if _merge==3
drop _merge


preserve
	collapseGeo cz
	save "$dataRoot/clean/acs_2015_cz", replace
restore

*** collapse at state level

preserve
	rename state_id state
	collapseGeo state
	save "$dataRoot/clean/acs_2015_st", replace
restore

